{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# LAB 06.01 - Clustering companies"]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": ["!wget --no-cache -O init.py -q https://raw.githubusercontent.com/rramosp/20201.xai4eng/master/content/init.py\n", "import init; init.init(force_download=False); init.get_weblink()\n"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": ["from local.lib.rlxmoocapi import submit, session\n", "student = session.Session(init.endpoint).login( course_id=init.course_id, \n", " session_id=\"UDEA\", \n", " lab_id=\"L06.01\" )\n", "init.get_weblink()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Dataset\n", "\n", "observe the following dataset with daily stock data from different companies"]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": ["import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from IPython.display import Image\n", "%matplotlib inline"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [{"data": {"text/plain": ["(60, 963)"]}, "execution_count": 2, "metadata": {}, "output_type": "execute_result"}], "source": ["\n", "d = pd.read_csv(\"local/data/company-stock-movements-2010-2015-incl.csv.gz\", index_col=0)\n", "d.shape\n"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", " | 2010-01-04 | \n", "2010-01-05 | \n", "2010-01-06 | \n", "2010-01-07 | \n", "2010-01-08 | \n", "2010-01-11 | \n", "2010-01-12 | \n", "2010-01-13 | \n", "2010-01-14 | \n", "2010-01-15 | \n", "... | \n", "2013-10-16 | \n", "2013-10-17 | \n", "2013-10-18 | \n", "2013-10-21 | \n", "2013-10-22 | \n", "2013-10-23 | \n", "2013-10-24 | \n", "2013-10-25 | \n", "2013-10-28 | \n", "2013-10-29 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Apple | \n", "0.580000 | \n", "-0.220005 | \n", "-3.409998 | \n", "-1.170000 | \n", "1.680011 | \n", "-2.689994 | \n", "-1.469994 | \n", "2.779997 | \n", "-0.680003 | \n", "-4.999995 | \n", "... | \n", "0.320008 | \n", "4.519997 | \n", "2.899987 | \n", "9.590019 | \n", "-6.540016 | \n", "5.959976 | \n", "6.910011 | \n", "-5.359962 | \n", "0.840019 | \n", "-19.589981 | \n", "
AIG | \n", "-0.640002 | \n", "-0.650000 | \n", "-0.210001 | \n", "-0.420000 | \n", "0.710001 | \n", "-0.200001 | \n", "-1.130001 | \n", "0.069999 | \n", "-0.119999 | \n", "-0.500000 | \n", "... | \n", "0.919998 | \n", "0.709999 | \n", "0.119999 | \n", "-0.480000 | \n", "0.010002 | \n", "-0.279998 | \n", "-0.190003 | \n", "-0.040001 | \n", "-0.400002 | \n", "0.660000 | \n", "
Amazon | \n", "-2.350006 | \n", "1.260009 | \n", "-2.350006 | \n", "-2.009995 | \n", "2.960006 | \n", "-2.309997 | \n", "-1.640007 | \n", "1.209999 | \n", "-1.790001 | \n", "-2.039994 | \n", "... | \n", "2.109985 | \n", "3.699982 | \n", "9.570008 | \n", "-3.450013 | \n", "4.820008 | \n", "-4.079986 | \n", "2.579986 | \n", "4.790009 | \n", "-1.760009 | \n", "3.740021 | \n", "
American express | \n", "0.109997 | \n", "0.000000 | \n", "0.260002 | \n", "0.720002 | \n", "0.190003 | \n", "-0.270001 | \n", "0.750000 | \n", "0.300004 | \n", "0.639999 | \n", "-0.130001 | \n", "... | \n", "0.680001 | \n", "2.290001 | \n", "0.409996 | \n", "-0.069999 | \n", "0.100006 | \n", "0.069999 | \n", "0.130005 | \n", "1.849999 | \n", "0.040001 | \n", "0.540001 | \n", "
Boeing | \n", "0.459999 | \n", "1.770000 | \n", "1.549999 | \n", "2.690003 | \n", "0.059997 | \n", "-1.080002 | \n", "0.360000 | \n", "0.549999 | \n", "0.530002 | \n", "-0.709999 | \n", "... | \n", "1.559997 | \n", "2.480003 | \n", "0.019997 | \n", "-1.220001 | \n", "0.480003 | \n", "3.020004 | \n", "-0.029999 | \n", "1.940002 | \n", "1.130005 | \n", "0.309998 | \n", "
5 rows \u00d7 963 columns
\n", "